ETL Work Flow Diagram:

NYPD Crime Data

Is extracted from New York City Open Data Portal. Is a csv file of all prosecuted crimes in New York City from 2006 to the present:

# Data extraction of NYPD Crime Data
download.file(url = "https://data.cityofnewyork.us/resource/8h9b-rp9u.csv",
              destfile = "NYPD_Arrests_Data__Historic_.csv")
nypd_crimes = read.csv("NYPD_Arrests_Data__Historic_.csv")

Processing of NYPD Crime Data

Next, we will select on the variables needed for analysis from the data set and select observations within the variable ranges for this analysis (Non-violent crimes committed between 2018-2019):

# data subset with only necessary fields
nypd_crimes_n = select(nypd_crimes, -c("arrest_key", "pd_cd", "pd_desc", 
                                       "ky_cd", "law_code", 
                                       "arrest_boro", "arrest_precinct",
                                       "age_group", "perp_sex"))
# format arrest_date as usable date variable
dates = c(nypd_crimes_n$arrest_date)
nypd_crimes_n$arrest_date = as.Date(dates, format = "%Y-%m-%d")
nypd_crimes_n$year = format(nypd_crimes_n$arrest_date, "%Y")

# select entries that fit project analysis parameters (crimes between 2018 
#and 2019, misdemenors and violations)
nypd_crimes_w = filter(nypd_crimes_n, year >= 2018) %>%
  filter(law_cat_cd == "M" | law_cat_cd == "V")

Census Data Wrangling

Using the data from tidy census, the demographic data for the census boundary will be processed.

# Finding correct variables
v20 = load_variables(2020, "pl")
view(v20)

#Selecting variables and counties
vars = paste0(c(total = "P1_001N", 
                total_white = "P1_003N", 
                total_black = "P1_004N", 
                total_native = "P1_005N", 
                total_asian = "P1_006N", 
                total_pacisla = "P1_007N", 
                total_other = "P1_008N", 
                total_hisplat= "P2_002N"))
counties = c("New York", "Kings", "Bronx", "Richmond", "Queens")

#Total populations by race in each census tract in each county in New York City

total_pop_20 = get_decennial(
  geography = "tract",
  variables = vars,
  state = "New York",
  county = counties,
  year = 2020
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data summary file
## Note: 2020 decennial Census data use differential privacy, a technique that
## introduces errors into data to preserve respondent confidentiality.
## ℹ Small counts should be interpreted with caution.
## ℹ See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
## This message is displayed once per session.

Retrieving Boundary Shapefiles for Census Tracts

# Retrieving Boundary Files
nyc_tracts = tracts("New York", counties, cb = TRUE)
## Retrieving data for the year 2020
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
# View of NYC Census Tract Boundaries
mapview(nyc_tracts)